load("../outputs/summary_phenotypes.Rdata")
mu <- mus[[1]]
##add primary and secrodary variable information
summs <- read.table("../outputs/phenotype_variable_summary.txt", header = T, stringsAsFactors = F)
#summs$Primary_Variable <- gsub("_Proband", "", summs$Primary_Variable)
#summs$Secondary_Variables <- gsub("_Proband", "", summs$Secondary_Variables)
#write.table(summs, file = "../outputs/phenotype_variable_summary.txt", col.names = T, row.names = F, quote = F, sep = "\t")
summs2 <- melt(summs, id = c("Construct", "Construct_Descriptioin"))
names(summs2)[2:4] <- c("Construct Descriptioin", "Interests", "Variable")
pro_summs <- merge(x = summs2, y = mu, by = "Variable", all.x = T, all.y = T)
pro_summs1 <- pro_summs[!is.na(pro_summs$Variable),]
pro_summs2 <- pro_summs1[!duplicated(pro_summs1$Variable),]
datatable(pro_summs2, rownames = FALSE, filter="top", options = list(pageLength = 5, scrollX=T) )
mu2 <- mus[[2]]
datatable(mu2, rownames = FALSE, filter="top", options = list(pageLength = 5, scrollX=T) )
mu3 <- mus[[3]]
datatable(mu3, rownames = FALSE, filter="top", options = list(pageLength = 5, scrollX=T) )
grid.raster(readPNG("figures/ssc_probands_phe.png"))
grid.raster(readPNG("figures/ssc_unaffSibs_phe.png"))
grid.raster(readPNG("figures/ssc_otherSibs_phe.png"))
sex <- read.table("../outputs/sexcheck.txt", header = T, stringsAsFactors = F)
res_sex1 <- sex[sex$chip == "UCSF_1Mv3",]
res_sex2 <- sex[sex$chip == "UCSF_1Mv1",]
res_sex3 <- sex[sex$chip == "UCSF_Omni2.5",]
fams <- read.table("../outputs/fams_covarites.txt", stringsAsFactors = F)
fam1 <- fams[fams$V7 == "1Mv3",]
fam2 <- fams[fams$V7 == "1Mv1",]
fam3 <- fams[fams$V7 == "Omni2.5",]
##confirm with other phenotype file
pros <- sex[sex$STATUS == "PROBLEM",]
pros$RRB <- "NA"
pros[pros$IID %in% fams$V2,]$RRB <- fams[match(pros[pros$IID %in% fams$V2,]$IID, fams$V2), "V8"]
datatable(pros, rownames = FALSE, filter="top", caption = htmltools::tags$caption(
style = 'caption-side: top; text-align: center; color:black;
font-size:200% ;',"Individuals with discordant sex information"), options = list(pageLength = 5, scrollX=T) )
grid.raster(readPNG("figures/sex_1mv3.png"))
grid.raster(readPNG("figures/sex_1mv1.png"))
grid.raster(readPNG("figures/sex_omni.png"))
gens <- read.table("../outputs/pruned_IBD.txt", header = T, stringsAsFactors = F)
gen_ibd1 <- gens[gens$chip == "UCSF_1Mv3",]
gen_ibd2 <- gens[gens$chip == "UCSF_1Mv1",]
gen_ibd3 <- gens[gens$chip == "UCSF_Omni2.5",]
grid.raster(readPNG("figures/ibd_1mv3.png"))
grid.raster(readPNG("figures/ibd_1mv1.png"))
grid.raster(readPNG("figures/ibd_omni.png"))
grid.raster(readPNG("figures/ibd_sexF_1mv3.png"))
grid.raster(readPNG("figures/ibd_sexF_1mv1.png"))
grid.raster(readPNG("figures/ibd_sexF_omni.png"))
het <- read.table("../outputs/qcd_autosome_pruned_het.txt", header = T, stringsAsFactors = F)
het$Het_mean <- (het$N.NM. - het$O.HOM.)/het$N.NM.
imiss <- read.table("../outputs/allchrs_flip2_imiss.txt", header = T, stringsAsFactors = F)
het_imiss <- merge(imiss, het, by=c("FID", "IID", "chip"))
het_imiss[het_imiss$chip == "UCSF_1Mv1",1] <- "Illumina 1Mv1"
het_imiss[het_imiss$chip == "UCSF_1Mv3",1] <- "Illumina 1Mv3"
het_imiss[het_imiss$chip == "UCSF_Omni2.5",1] <- "Illumina Omni2.5M"
mu <- ddply(het_imiss, "chip", summarise, Het_mean=mean(Het_mean), F_mean=mean(F))
sds <-ddply(het_imiss, "chip", summarise, Het_sd=sd(Het_mean), F_sd=sd(F))
mus <- merge(mu, sds, by = "chip")
hetTh <- 3
fail_het <- het[het$F < (mean(het$F) - hetTh*sd(het$F)) |
het$F > (mean(het$F) + hetTh*sd(het$F)),] #367 individuals for 3SD and 35 for 5 SD.
p_het <- ggplot() + geom_point(data=het_imiss, aes_string(x='F_MISS', y='Het_mean')) +
xlab("Proportion of missing SNPs") +
ylab("Mean heterozygosity") +
theme_bw() +
theme(
text = element_text(size=12, face="bold"),
plot.title = element_text(color="black", size=14, hjust = 0.5,vjust = 1.2)
) +
ggtitle("Genome-wide mean heterozygosity by Missingness across samples") +
geom_hline(data = mus, aes(yintercept=Het_mean + 5 * Het_sd), lty=2,
col="#e7298a") +
geom_hline(data = mus, aes(yintercept=Het_mean - 5 * Het_sd), lty=2,
col="#e7298a") +
geom_hline(data = mus, aes(yintercept=Het_mean + 3 * Het_sd), lty=2,
col="azure4") +
geom_hline(data = mus, aes(yintercept=Het_mean - 3 * Het_sd), lty=2,
col="azure4") +
facet_wrap(~chip, nrow =1) +
geom_vline(xintercept=0.1, col="azure4", lty=2)
p_F <- ggplot() + geom_point(data=het_imiss, aes_string(x='F_MISS', y='F')) +
xlab("Proportion of missing SNPs") +
ylab("Inbreeding Coefficient (F)") +
theme_bw() +
theme(
text = element_text(size=12, face="bold"),
plot.title = element_text(color="black", size=14, hjust = 0.5,vjust = 1.2)
) +
ggtitle("Inbreeding Coefficient (F) by Missingness across samples") +
geom_hline(data = mus, aes(yintercept=F_mean + 5 * F_sd), lty=2,
col="#e7298a") +
geom_hline(data = mus, aes(yintercept=F_mean - 5 * F_sd), lty=2,
col="#e7298a") +
geom_hline(data = mus, aes(yintercept=F_mean + 3 * F_sd), lty=2,
col="azure4") +
geom_hline(data = mus, aes(yintercept=F_mean - 3 * F_sd), lty=2,
col="azure4") +
facet_wrap(~chip, nrow =1) +
geom_vline(xintercept=0.1, col="azure4", lty=2)
# ggplotly(p_het)
#
# ggplotly(p_F)
ggsave(file.path("figures/p_het.png"), p_het, height = 4, width = 6, dpi = 300)
ggsave(file.path("figures/p_F.png"), p_F, height = 4, width = 6, dpi = 300)
grid.raster(readPNG("figures/p_het.png"))
grid.raster(readPNG("figures/p_F.png"))
grid.raster(readPNG("figures/p_het_ibd1.png"))
grid.raster(readPNG("figures/p_het_ibd2.png"))
grid.raster(readPNG("figures/p_het_ibd3.png"))